PARCOMPUTE = TRUE
N_CORE = parallel::detectCores()

Background

In this notebook, we repeat the analysis of 02_temporal_heterogeneity.Rmd for all of our core indicators.

Data setup

# Fetch the following sources and signals from the API 
# TODO: Add Google Symptoms "eventually"
source_names = c("doctor-visits", "fb-survey", "fb-survey",
                 "hospital-admissions", "hospital-admissions")
signal_names = c("smoothed_adj_cli", "smoothed_cli", "smoothed_hh_cmnty_cli", 
            "smoothed_adj_covid19_from_claims", "smoothed_adj_covid19_from_claims")
pretty_names = c("Doctor visits", "Facebook CLI", "Facebook CLI-in-community", 
          "Hospitalizations", "Hospitalizations")
target_names = c("Cases", "Cases", "Cases", "Cases", "Deaths")
geo_level = params$geo_value

start_day = "2020-04-15"
end_day = NULL
cache_fname = sprintf('cached_data/12_heterogeneity_core_indicators_%s.RDS',
                      geo_level)

if (!file.exists(cache_fname)) {
  df_signals = vector("list", length(signal_names))
  for (i in 1:length(signal_names)) {
    df_signals[[i]] = suppressWarnings(
                        covidcast_signal(source_names[i], signal_names[i],
                                         start_day, end_day,
                                         geo_type=geo_level))
  }

  # Fetch USAFacts confirmed case incidence proportion (smoothed with 7-day 
  # trailing average)
  df_cases = suppressWarnings(
              covidcast_signal("usa-facts", "confirmed_7dav_incidence_prop",
                              start_day, end_day,
                              geo_type=geo_level))

  df_deaths = suppressWarnings(
              covidcast_signal("usa-facts", "deaths_7dav_incidence_prop",
                              start_day, end_day,
                              geo_type=geo_level))

  saveRDS(list(df_signals, df_cases, df_deaths), cache_fname)
} else {
  cached_data = readRDS(cache_fname)
  df_signals = cached_data[[1]]
  df_cases = cached_data[[2]]
  df_deaths = cached_data[[3]]
}

case_num = 500

if (geo_level == 'county') {
  geo_values = suppressWarnings(covidcast_signal("usa-facts", "confirmed_cumulative_num",
                                '2020-11-01', 
                                '2020-11-01')) %>%
    filter(value >= case_num) %>% pull(geo_value)
} else if (geo_level == 'state') {
  geo_values = unique(df_signals[[1]]$geo_value)
}

Setup

sensorize_time_ranges = list(
      c(-42, -8),
      c(-49, -8),
      c(-56, -8),
      c(-63, -8),
      c(-70, -8)
)

for (ind_idx in 1:length(source_names)) {
  if (target_names[ind_idx] == 'Cases') {
    df_target = df_cases
  } else if (target_names[ind_idx] == 'Deaths') {
    df_target = df_deaths
  } else {
    stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
  }
  ind_df = tibble(df_signals[[ind_idx]]) %>% filter(geo_value %in% geo_values)
  ind_target = inner_join(ind_df, tibble(df_target),
                          by=c('geo_value', 'time_value')) %>% select (
        geo_value=geo_value,
        time_value=time_value,
        indicator_value=value.x,
        target_value=value.y,
      )
    ind_global_sensorized =  ind_target %>% group_by (
                geo_value,
            ) %>% group_modify ( ~ {
                fit = lm(target_value ~ indicator_value, data =.x);
                tibble(time_value=.x$time_value,
                             indicator_value=.x$indicator_value,
                             target_value=.x$target_value,
                             sensorized_value=fit$fitted.values)
            }) %>% ungroup
    df_global_sensorized = ind_global_sensorized %>% transmute (
                geo_value=geo_value,
                signal='ind_sensorized',
                time_value=time_value,
                direction=NA,
                issue=lubridate::ymd('2020-11-01'),
                lag=NA,
                value=sensorized_value,
                stderr=NA,
                sample_size=NA,
                data_source='linear_sensorization',
            )
    attributes(df_global_sensorized)$geo_type = 'county'
    attributes(df_global_sensorized)$metadata$geo_type = 'county'
    class(df_global_sensorized) = c("covidcast_signal", "data.frame")

  base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
                           geo_level,
                           source_names[ind_idx], signal_names[ind_idx],
                           target_names[ind_idx])
  if (!file.exists(base_cor_fname)) {
    df_cor_base_ind = covidcast_cor(df_signals[[ind_idx]], df_target,
                                   by='time_value', method='spearman')
    df_cor_sensorized_ind = covidcast_cor(df_global_sensorized, df_target,
                                         by='time_value', method='spearman')
    df_cor_base = rbind(df_cor_base_ind, df_cor_sensorized_ind)
    df_cor_base$Indicator = as.factor(c(rep('Raw', nrow(df_cor_base_ind)),
                                        rep('Sensorized (Spatial)',
                                            nrow(df_cor_sensorized_ind))))
    saveRDS(df_cor_base, base_cor_fname)
  } else {
    df_cor_base = readRDS(base_cor_fname)
  }



  sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])
  sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])
  if (!file.exists(sensorize_fname)) {
    sensorize_cors = vector('list', length(sensorize_time_ranges))
    ind_target_sensorized_list = vector('list', length(sensorize_time_ranges))
    for (outer_idx in 1:length(sensorize_time_ranges)) {
      sensorize_llim = sensorize_time_ranges[[outer_idx]][1]
      sensorize_ulim = sensorize_time_ranges[[outer_idx]][2]

      min_sensorize_date = lubridate::ymd(start_day) - sensorize_llim
      max_sensorize_date = max(ind_target$time_value)
      sensorize_date_offsets = 0:(max_sensorize_date-min_sensorize_date)

      joiner_df_list = vector('list', length(sensorize_date_offsets))
      for (idx in 1:length(sensorize_date_offsets)) {
        dt = sensorize_date_offsets[idx]
        sensorize_date = min_sensorize_date + dt
        joiner_df_list[[idx]] = tibble(
                          sensorize_date = sensorize_date,
                          time_value = sensorize_date + sensorize_llim:sensorize_ulim)
      }
      joiner_df = bind_rows(joiner_df_list)

      if (!PARCOMPUTE) {
        ind_sensorized_lm =  ind_target %>% inner_join (
              joiner_df,
              on='time_value',
            ) %>%  group_by (
              geo_value,
              sensorize_date,
            ) %>% group_modify (
              ~ broom::tidy(lm(target_value ~ indicator_value, data = .x))
            ) %>% ungroup
      } else {
        ind_grouped_list =   ind_target %>% inner_join (
              joiner_df,
              on='time_value',
            ) %>%  group_by (
              geo_value,
              sensorize_date,
            ) %>% group_split
        ind_sensorized_lm = parallel::mclapply(ind_grouped_list, function(df) {
            broom::tidy(
              lm(target_value ~ indicator_value, data = df)
            ) %>% mutate (
              geo_value = unique(df$geo_value),
              sensorize_date = unique(df$sensorize_date),
            )}, mc.cores = N_CORE) %>% bind_rows
      }
      ind_sensorized_wide = ind_sensorized_lm %>% select(
            geo_value,
            sensorize_date,
            term,
            estimate,
          ) %>% mutate (
            term = sapply(term, function(x) {ifelse(x=='(Intercept)',
                                                    'intercept',
                                                    'slope')}),
          ) %>% pivot_wider (
            id_cols = c(geo_value, sensorize_date),
            names_from=term,
            values_from=estimate,
          )
      ind_target_sensorized = ind_target %>% inner_join (
            ind_sensorized_wide,
            by=c('time_value'='sensorize_date',
                 'geo_value'),
          ) %>% mutate (
            sensorized_value = intercept + indicator_value * slope,
          )
      df_sensorized = ind_target_sensorized %>% transmute (
            geo_value=geo_value,
            signal='ind_sensorized',
            time_value=time_value,
            direction=NA,
            issue=lubridate::ymd('2020-11-01'),
            lag=NA,
            value=sensorized_value,
            stderr=NA,
            sample_size=NA,
            data_source='linear_sensorization',
          )
      attributes(df_sensorized)$geo_type = 'county'
      class(df_sensorized) = c("covidcast_signal", "data.frame")

      df_cor_sensorized_ind = covidcast_cor(df_sensorized, df_target,
                                           by='time_value', method='spearman')
      df_cor_sensorized_ind$Indicator = sprintf('Sensorized (TS, %d:%d)',
                                               sensorize_llim,
                                               sensorize_ulim)
      sensorize_cors[[outer_idx]] = df_cor_sensorized_ind
      ind_target_sensorized_list[[outer_idx]] = ind_target_sensorized
    }

    saveRDS(sensorize_cors, sensorize_fname)
    saveRDS(ind_target_sensorized_list, sensorize_val_fname)
  } else {
    sensorize_cors = readRDS(sensorize_fname)
    ind_target_sensorized_list = readRDS(sensorize_val_fname)
  }

  df_cor = bind_rows(df_cor_base, sensorize_cors)
  df_cor$Indicator = stringr::str_replace(df_cor$Indicator,
                                          'Sensorized ',
                                          "")
  df_cor$Indicator = factor(df_cor$Indicator,
                            levels=c('Raw',
                                     "(Spatial)",
                                     sapply(sensorize_time_ranges,
                                            function(x) {
                                              sprintf("(TS, %d:%d)",
                                                      x[[1]], x[[2]])
                                            })))

  plt = ggplot(df_cor, aes(x = time_value, y = value)) +
    geom_line(aes(color = Indicator)) +
    labs(title = sprintf("Correlation between %s and %s",
                         pretty_names[ind_idx],
                         target_names[ind_idx]),
         subtitle = "Per day",
         x = "Date", y = "Correlation") +
    theme(legend.position = "bottom")
  print(plt)
}
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 287 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"

## Warning: Removed 281 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"

## Warning: Removed 281 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"

## Warning: Removed 287 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"

## Warning: Removed 287 row(s) containing missing values (geom_path).

QUANTS = c(0.01, 0.99)

# TODO: Add more "core indicators"

for (ind_idx in 1:length(source_names)) {
  if (target_names[ind_idx] == 'Cases') {
    df_target = df_cases
  } else if (target_names[ind_idx] == 'Deaths') {
    df_target = df_deaths
  } else {
    stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
  }

  base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])
  sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])
  sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])

  df_cor_base = readRDS(base_cor_fname)
  sensorize_cors = readRDS(sensorize_fname)
  sensorized_vals = readRDS(sensorize_val_fname)

  for (inner_idx in 1:length(sensorize_time_ranges)) {
    sv = sensorized_vals[[inner_idx]]
    print(summary(sv$slope))
    print(slope_limits <- quantile(sv$slope, QUANTS, na.rm=TRUE))
    plt = ggplot(
      sensorized_vals[[inner_idx]],
      aes(x=time_value,
          y=slope),
    ) + geom_point (
      alpha=0.1,
      size=0.5,
    ) + geom_hline (
      yintercept=0,
      colour='white',
    ) + stat_summary (
        aes(y=slope,
            group=1,
            colour='median'),
        fun=median,
        geom="line",
        group=1,
    ) + stat_summary (
        aes(y=slope,
            group=1,
            colour='+/- mad'),
        fun=function(x) { median(x) + mad(x) },
        geom="line",
        group=1,
    ) + stat_summary (
        aes(y=slope,
            group=1,
            colour='+/- mad'),
        fun=function(x) { median(x) - mad(x) },
        geom="line",
        group=1,
    ) + scale_colour_manual(
        values=c("median"="maroon",
                 "+/- mad"="darkgreen")
    ) + labs(
      colour=''
    ) + ggtitle(
      sprintf("Slope distribution for %s[%s], fitted on t in %d:%d",
              pretty_names[ind_idx],
              target_names[ind_idx],
              sensorize_time_ranges[[inner_idx]][1],
              sensorize_time_ranges[[inner_idx]][2])
    ) + ylim (
      slope_limits[[1]], slope_limits[[2]]
    )
    print(plt)
  }
}
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -6.99505 -0.07659  0.85530  1.79235  2.78705 22.67845 
##        1%       99% 
## -3.527572 14.757571
## Warning: Removed 188 rows containing non-finite values (stat_summary).

## Warning: Removed 188 rows containing non-finite values (stat_summary).

## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -5.9632 -0.0022  0.9907  1.9269  2.9005 22.6117 
##        1%       99% 
## -2.848045 14.762133
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).

## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -4.14923  0.08725  1.13688  2.04799  3.00309 20.26235 
##        1%       99% 
## -2.483166 15.210452
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing non-finite values (stat_summary).

## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.8580  0.1961  1.2541  2.1569  3.1310 20.8083 
##        1%       99% 
## -2.031058 15.452289
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).

## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.6155  0.2806  1.3897  2.2576  3.2259 20.9600 
##        1%       99% 
## -1.973515 15.316266
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing non-finite values (stat_summary).

## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -47.4643  -0.6898   5.2872  11.8956  20.9973 148.6280 
##        1%       99% 
## -25.83519  78.26310
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).

## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -39.52026  -0.08779   7.10494  13.85845  24.39977 150.64244 
##        1%       99% 
## -21.95926  81.65420
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).

## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -37.7967   0.3934   9.1225  15.6121  27.1180 139.5317 
##        1%       99% 
## -18.91136  83.84794
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).

## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -27.2519   0.7216  10.6485  17.1112  28.8532 132.2374 
##        1%       99% 
## -15.47654  84.35775
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).

## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -29.285   1.306  12.542  18.500  30.662 125.651 
##        1%       99% 
## -14.46024  84.06255
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).

## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.8710  0.2182  0.9772  1.1369  1.8059  9.1133 
##        1%       99% 
## -1.367857  4.983324
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).

## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.5023  0.3095  1.0449  1.1708  1.8254  8.5906 
##        1%       99% 
## -1.223981  4.621978
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).

## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.1501  0.4219  1.0847  1.1977  1.8257  7.8688 
##         1%        99% 
## -0.9932766  4.3220581
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).

## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.2332  0.5277  1.1455  1.2276  1.8285  6.7969 
##         1%        99% 
## -0.6274733  4.0525981
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).

## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.9483  0.6222  1.1853  1.2632  1.8353  6.3926 
##         1%        99% 
## -0.3992849  3.9132253
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).

## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -39.80474  -0.00589   0.92305   1.64847   2.52858  49.86180         3 
##        1%       99% 
## -4.554879 15.847305
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing non-finite values (stat_summary).

## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -53.72601   0.08357   1.06095   1.76913   2.72513  41.11893         2 
##        1%       99% 
## -4.142148 15.016964
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).

## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -11.1694   0.1807   1.1951   1.8689   2.9219  36.4991        2 
##        1%       99% 
## -3.950131 14.224453
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing non-finite values (stat_summary).

## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -9.686   0.246   1.323   1.964   3.091  33.875 
##        1%       99% 
## -3.905274 14.143066
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).

## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -11.018   0.335   1.441   2.055   3.221  33.579 
##       1%      99% 
## -3.21255 13.60217
## Warning: Removed 148 rows containing non-finite values (stat_summary).
## Warning: Removed 148 rows containing non-finite values (stat_summary).

## Warning: Removed 148 rows containing non-finite values (stat_summary).
## Warning: Removed 148 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -1.803954 -0.003612  0.013962  0.021795  0.041723  1.095137         3 
##         1%        99% 
## -0.1736429  0.2959707
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing non-finite values (stat_summary).

## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -1.370046 -0.001546  0.016001  0.024139  0.042797  1.095137         2 
##         1%        99% 
## -0.1384968  0.3026368
## Warning: Removed 172 rows containing non-finite values (stat_summary).
## Warning: Removed 172 rows containing non-finite values (stat_summary).

## Warning: Removed 172 rows containing non-finite values (stat_summary).
## Warning: Removed 172 rows containing missing values (geom_point).

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max.       NA's 
## -0.4333028 -0.0002924  0.0171806  0.0256139  0.0436638  0.8118412          2 
##         1%        99% 
## -0.1270568  0.2893027
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing non-finite values (stat_summary).

## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.438961  0.000788  0.019057  0.027058  0.044112  0.607658 
##         1%        99% 
## -0.1350308  0.3159032
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).

## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.393282  0.003056  0.020350  0.029035  0.044812  0.602471 
##         1%        99% 
## -0.1002845  0.2888718
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).

## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing missing values (geom_point).

QUANTS = c(0.01, 0.99)

# TODO: Add more "core indicators"

for (ind_idx in 1:length(source_names)) {
  if (target_names[ind_idx] == 'Cases') {
    df_target = df_cases
  } else if (target_names[ind_idx] == 'Deaths') {
    df_target = df_deaths
  } else {
    stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
  }

  base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])
  sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])
  sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
                            geo_level,
                            source_names[ind_idx], signal_names[ind_idx],
                            target_names[ind_idx])

  df_cor_base = readRDS(base_cor_fname)
  sensorize_cors = readRDS(sensorize_fname)
  sensorized_vals = readRDS(sensorize_val_fname)

  for (inner_idx in 1:length(sensorize_time_ranges)) {
    sv = sensorized_vals[[inner_idx]]
    print(summary(sv$intercept))
    print(intercept_limits <- quantile(sv$intercept, QUANTS, na.rm=TRUE))
    plt = ggplot(
      sensorized_vals[[inner_idx]],
      aes(x=time_value,
          y=intercept),
    ) + geom_point (
      alpha=0.1,
      size=0.5,
    ) + geom_hline (
      yintercept=0,
      colour='white',
    ) + stat_summary (
        aes(y=intercept,
            group=1,
            colour='median'),
        fun=median,
        geom="line",
        group=1,
    ) + stat_summary (
        aes(y=intercept,
            group=1,
            colour='+/- mad'),
        fun=function(x) { median(x) + mad(x) },
        geom="line",
        group=1,
    ) + stat_summary (
        aes(y=intercept,
            group=1,
            colour='+/- mad'),
        fun=function(x) { median(x) - mad(x) },
        geom="line",
        group=1,
    ) + scale_colour_manual(
        values=c("median"="maroon",
                 "+/- mad"="darkgreen")
    ) + labs(
      colour=''
    ) + ggtitle(
      sprintf("Intercept distribution for %s[%s], fitted on t in %d:%d",
              pretty_names[ind_idx],
              target_names[ind_idx],
              sensorize_time_ranges[[inner_idx]][1],
              sensorize_time_ranges[[inner_idx]][2])
    ) + ylim (
      intercept_limits[[1]], intercept_limits[[2]]
    )
    print(plt)
  }
}
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -85.22700   0.06234   3.65336   4.60559   9.02520  92.78432 
##        1%       99% 
## -38.51123  48.60214
## Warning: Removed 188 rows containing non-finite values (stat_summary).

## Warning: Removed 188 rows containing non-finite values (stat_summary).

## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -87.8276  -0.1411   3.2904   4.0433   8.6604  89.6721 
##        1%       99% 
## -35.55893  40.42421
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).

## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -79.3462  -0.4356   2.9715   3.5629   8.3476  72.0629 
##        1%       99% 
## -30.36604  33.38218
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing non-finite values (stat_summary).

## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -77.589  -0.649   2.753   3.109   7.952  51.441 
##        1%       99% 
## -29.48563  28.49836
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).

## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -67.4429  -0.9262   2.4858   2.6871   7.4899  49.1192 
##        1%       99% 
## -29.18673  25.69813
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing non-finite values (stat_summary).

## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -60.3477   0.6356   5.0620   5.1291  10.5415  60.3333 
##        1%       99% 
## -31.39289  36.57134
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).

## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -61.948  -0.293   4.345   3.781   9.576  47.869 
##        1%       99% 
## -32.05857  32.16370
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).

## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -64.570  -1.543   3.757   2.660   8.681  50.328 
##        1%       99% 
## -32.85714  29.26475
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).

## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -56.555  -2.717   3.126   1.763   7.986  41.071 
##        1%       99% 
## -32.24887  26.76281
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).

## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -53.0221  -3.8175   2.5191   0.9694   7.4169  39.2552 
##        1%       99% 
## -31.92991  25.29219
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).

## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -269.928  -22.600   -7.985  -13.527    1.961   75.513 
##         1%        99% 
## -130.44641   34.23281
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).

## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -244.4186  -22.6611   -9.1787  -13.9614    0.8268   63.4465 
##         1%        99% 
## -115.11382   31.26952
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).

## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -227.53407  -22.54198   -9.79478  -14.17981    0.04779   65.54112 
##        1%       99% 
## -99.28330  29.07741
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).

## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -192.637  -22.553  -10.593  -14.460   -1.005   52.440 
##        1%       99% 
## -88.08429  24.74444
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).

## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -179.451  -22.544  -11.244  -14.913   -2.342   47.856 
##        1%       99% 
## -80.06907  18.63802
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).

## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -95.994   2.123   5.344   6.974  10.556  91.850 
##        1%       99% 
## -17.84004  39.63661
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing non-finite values (stat_summary).

## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -51.923   1.997   5.148   6.543   9.907  91.850 
##        1%       99% 
## -14.88075  33.94944
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).

## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -39.315   1.872   4.797   6.162   9.438 103.884 
##        1%       99% 
## -12.60972  33.16308
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing non-finite values (stat_summary).

## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -37.107   1.758   4.517   5.836   8.995  98.408 
##        1%       99% 
## -11.74648  31.21223
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).

## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -39.296   1.599   4.294   5.524   8.619  98.408 
##        1%       99% 
## -10.47504  28.78262
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).

## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.70649  0.03892  0.11684  0.17133  0.22964  2.14846 
##        1%       99% 
## -0.360243  1.359333
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing non-finite values (stat_summary).

## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.61498  0.03942  0.11333  0.16214  0.22129  1.71702 
##         1%        99% 
## -0.2694737  1.1420245
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).

## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.65464  0.03788  0.11213  0.15431  0.21321  2.60893 
##         1%        99% 
## -0.2440557  0.9950797
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing non-finite values (stat_summary).

## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.63622  0.03826  0.11140  0.14786  0.20371  2.17876 
##         1%        99% 
## -0.2622776  0.8975714
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).

## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.50874  0.03885  0.10910  0.14033  0.19236  2.17876 
##         1%        99% 
## -0.2218097  0.7774261
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).

## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing missing values (geom_point).